"""
Elasticsearch utilities for indexing and managing documents.
"""

import logging
import os
from typing import Any, Dict, List, Optional

from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

logger = logging.getLogger(__name__)


class ElasticsearchManager:
    """
    Utility class for managing Elasticsearch indices and documents.

    This class provides methods for creating indices, indexing documents,
    and performing other Elasticsearch management tasks.
    """

    def __init__(
        self,
        hosts: List[str] = ["http://localhost:9200"],
        username: Optional[str] = None,
        password: Optional[str] = None,
        api_key: Optional[str] = None,
        cloud_id: Optional[str] = None,
    ):
        """
        Initialize the Elasticsearch manager.

        Args:
            hosts: List of Elasticsearch hosts
            username: Optional username for authentication
            password: Optional password for authentication
            api_key: Optional API key for authentication
            cloud_id: Optional Elastic Cloud ID
        """
        # Initialize the Elasticsearch client
        es_args = {}

        # Basic authentication
        if username and password:
            es_args["basic_auth"] = (username, password)

        # API key authentication
        if api_key:
            es_args["api_key"] = api_key

        # Cloud ID for Elastic Cloud
        if cloud_id:
            es_args["cloud_id"] = cloud_id

        # Connect to Elasticsearch
        self.client = Elasticsearch(hosts, **es_args)

        # Verify connection
        try:
            info = self.client.info()
            logger.info(
                f"Connected to Elasticsearch cluster: {info.get('cluster_name')}"
            )
            logger.info(
                f"Elasticsearch version: {info.get('version', {}).get('number')}"
            )
        except Exception as e:
            logger.error(f"Failed to connect to Elasticsearch: {str(e)}")
            raise ConnectionError(
                f"Could not connect to Elasticsearch: {str(e)}"
            )

    def create_index(
        self,
        index_name: str,
        mappings: Optional[Dict[str, Any]] = None,
        settings: Optional[Dict[str, Any]] = None,
    ) -> bool:
        """
        Create an Elasticsearch index with optional mappings and settings.

        Args:
            index_name: Name of the index to create
            mappings: Optional mappings for the index fields
            settings: Optional settings for the index

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Check if index already exists
            if self.client.indices.exists(index=index_name):
                logger.warning(
                    f"Index '{index_name}' already exists - skipping creation"
                )
                return True

            # Default mappings for better text search if none provided
            if mappings is None:
                mappings = {
                    "properties": {
                        "title": {
                            "type": "text",
                            "analyzer": "standard",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256,
                                }
                            },
                        },
                        "content": {"type": "text", "analyzer": "standard"},
                        "url": {"type": "keyword"},
                        "source": {"type": "keyword"},
                        "timestamp": {"type": "date"},
                        "metadata": {"type": "object", "enabled": True},
                    }
                }

            # Default settings if none provided
            if settings is None:
                settings = {
                    "number_of_shards": 1,
                    "number_of_replicas": 0,
                    "analysis": {
                        "analyzer": {"standard": {"type": "standard"}}
                    },
                }

            # Create the index with mappings and settings
            create_response = self.client.indices.create(
                index=index_name,
                mappings=mappings,
                settings=settings,
            )

            logger.info(f"Created index '{index_name}': {create_response}")
            return True

        except Exception as e:
            logger.error(f"Error creating index '{index_name}': {str(e)}")
            return False

    def delete_index(self, index_name: str) -> bool:
        """
        Delete an Elasticsearch index.

        Args:
            index_name: Name of the index to delete

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Check if index exists
            if not self.client.indices.exists(index=index_name):
                logger.warning(
                    f"Index '{index_name}' does not exist - skipping deletion"
                )
                return True

            # Delete the index
            delete_response = self.client.indices.delete(index=index_name)
            logger.info(f"Deleted index '{index_name}': {delete_response}")
            return True

        except Exception as e:
            logger.error(f"Error deleting index '{index_name}': {str(e)}")
            return False

    def index_document(
        self,
        index_name: str,
        document: Dict[str, Any],
        document_id: Optional[str] = None,
        refresh: bool = False,
    ) -> Optional[str]:
        """
        Index a single document in Elasticsearch.

        Args:
            index_name: Name of the index to add the document to
            document: The document to index
            document_id: Optional document ID (will be generated if not provided)
            refresh: Whether to refresh the index after indexing

        Returns:
            str: Document ID if successful, None otherwise
        """
        try:
            # Index the document
            response = self.client.index(
                index=index_name,
                document=document,
                id=document_id,
                refresh=refresh,
            )

            logger.info(
                f"Indexed document in '{index_name}' with ID: {response['_id']}"
            )
            return response["_id"]

        except Exception as e:
            logger.error(f"Error indexing document in '{index_name}': {str(e)}")
            return None

    def bulk_index_documents(
        self,
        index_name: str,
        documents: List[Dict[str, Any]],
        id_field: Optional[str] = None,
        refresh: bool = False,
    ) -> int:
        """
        Bulk index multiple documents in Elasticsearch.

        Args:
            index_name: Name of the index to add the documents to
            documents: List of documents to index
            id_field: Optional field in the documents to use as the document ID
            refresh: Whether to refresh the index after indexing

        Returns:
            int: Number of successfully indexed documents
        """
        try:
            # Prepare the bulk actions
            actions = []
            for doc in documents:
                action = {
                    "_index": index_name,
                    "_source": doc,
                }

                # Use the specified field as the document ID if provided
                if id_field and id_field in doc:
                    action["_id"] = doc[id_field]

                actions.append(action)

            # Execute the bulk indexing
            success, failed = bulk(
                self.client,
                actions,
                refresh=refresh,
                stats_only=True,
            )

            logger.info(
                f"Bulk indexed {success} documents in '{index_name}', failed: {failed}"
            )
            return success

        except Exception as e:
            logger.error(
                f"Error bulk indexing documents in '{index_name}': {str(e)}"
            )
            return 0

    def index_file(
        self,
        index_name: str,
        file_path: str,
        content_field: str = "content",
        title_field: Optional[str] = "title",
        extract_metadata: bool = True,
        refresh: bool = False,
    ) -> Optional[str]:
        """
        Index a file in Elasticsearch, extracting text content and metadata.

        Args:
            index_name: Name of the index to add the document to
            file_path: Path to the file to index
            content_field: Field name to store the file content
            title_field: Field name to store the file title (filename if not specified)
            extract_metadata: Whether to extract file metadata
            refresh: Whether to refresh the index after indexing

        Returns:
            str: Document ID if successful, None otherwise
        """
        try:
            from langchain_community.document_loaders import (
                UnstructuredFileLoader,
            )

            # Extract file content and metadata
            loader = UnstructuredFileLoader(file_path)
            documents = loader.load()

            # Combine all content from the documents
            content = "\n\n".join([doc.page_content for doc in documents])

            # Get the filename for the title
            filename = os.path.basename(file_path)
            title = filename

            # Prepare the document
            document = {
                content_field: content,
            }

            # Add title if requested
            if title_field:
                document[title_field] = title

            # Add metadata if requested
            if extract_metadata and documents:
                # Include metadata from the first document
                document["metadata"] = documents[0].metadata

                # Add file-specific metadata
                document["source"] = file_path
                document["file_extension"] = os.path.splitext(filename)[
                    1
                ].lstrip(".")
                document["filename"] = filename

            # Index the document
            return self.index_document(index_name, document, refresh=refresh)

        except ImportError:
            logger.error(
                "UnstructuredFileLoader not available. Please install the 'unstructured' package."
            )
            return None
        except Exception as e:
            logger.error(f"Error indexing file '{file_path}': {str(e)}")
            return None

    def index_directory(
        self,
        index_name: str,
        directory_path: str,
        file_patterns: List[str] = ["*.txt", "*.pdf", "*.docx", "*.md"],
        content_field: str = "content",
        title_field: str = "title",
        extract_metadata: bool = True,
        refresh: bool = False,
    ) -> int:
        """
        Index all matching files in a directory in Elasticsearch.

        Args:
            index_name: Name of the index to add the documents to
            directory_path: Path to the directory containing files to index
            file_patterns: List of file patterns to match (glob patterns)
            content_field: Field name to store the file content
            title_field: Field name to store the file title
            extract_metadata: Whether to extract file metadata
            refresh: Whether to refresh the index after indexing

        Returns:
            int: Number of successfully indexed files
        """
        try:
            import glob

            # Find all matching files
            all_files = []
            for pattern in file_patterns:
                pattern_path = os.path.join(directory_path, pattern)
                matching_files = glob.glob(pattern_path)
                all_files.extend(matching_files)

            logger.info(
                f"Found {len(all_files)} files matching patterns {file_patterns} in {directory_path}"
            )

            # Index each file
            successful_count = 0
            for file_path in all_files:
                logger.info(f"Indexing file: {file_path}")
                doc_id = self.index_file(
                    index_name=index_name,
                    file_path=file_path,
                    content_field=content_field,
                    title_field=title_field,
                    extract_metadata=extract_metadata,
                    refresh=refresh,
                )

                if doc_id:
                    successful_count += 1

            logger.info(
                f"Successfully indexed {successful_count} files out of {len(all_files)}"
            )
            return successful_count

        except Exception as e:
            logger.error(
                f"Error indexing directory '{directory_path}': {str(e)}"
            )
            return 0

    def search(
        self,
        index_name: str,
        query: str,
        fields: List[str] = ["content", "title"],
        size: int = 10,
        highlight: bool = True,
    ) -> Dict[str, Any]:
        """
        Search for documents in Elasticsearch.

        Args:
            index_name: Name of the index to search
            query: Search query
            fields: Fields to search in
            size: Maximum number of results to return
            highlight: Whether to include highlighted excerpts in results

        Returns:
            Dict: Elasticsearch search response
        """
        try:
            search_query = {
                "query": {
                    "multi_match": {
                        "query": query,
                        "fields": fields,
                        "type": "best_fields",
                        "tie_breaker": 0.3,
                    }
                },
                "size": size,
            }

            # Add highlighting if requested
            if highlight:
                search_query["highlight"] = {
                    "fields": {field: {} for field in fields},
                    "pre_tags": ["<em>"],
                    "post_tags": ["</em>"],
                }

            # Execute the search
            response = self.client.search(
                index=index_name,
                body=search_query,
            )

            return response

        except Exception as e:
            logger.error(f"Error searching index '{index_name}': {str(e)}")
            return {"error": str(e)}
